Here, we will cluster the tumoral cells for case 102.
library(Seurat)
library(Signac)
library(SLOcatoR)
library(EnsDb.Hsapiens.v86)
library(ggpubr)
library(tidyverse)
library(pals)
library(openxlsx)
library(here)
#library(presto)
library(DT)
set.seed(173)
path_to_obj <- here("MCL/results/R_objects/6.seurat_tumoral_102.rds")
path_to_save_xlsx <- here("MCL/3-clustering/tmp/7.seurat_tumoral_102_clustered_markers.xlsx")
path_to_save_obj <- here("MCL/results/R_objects/7.seurat_tumoral_102_clustered.rds")
path_to_save_shiny_metadata <- here("MCL/results/R_objects/7.seurat_tumoral_102_clustered_shiny_metadata.rds")
path_to_save_shiny_expression <- here("MCL/results/R_objects/7.seurat_tumoral_102_clustered_shiny_expression.rds")
# Colors
color_palette <- c("#E6194B", "#3CB44B", "#FFD8B1", "#4363D8", "#F58231",
"#911EB4", "#46F0F0", "#F032E6", "#BCF60C", "#FABEBE",
"#008080", "#E6BEFF", "#9A6324", "#FFFAC8", "#800000",
"#AAFFC3", "#808000", "#FFE119", "#000075", "#808080",
"#000000", "tan", "darkgrey")
# Source functions
source(here::here("scRNA-seq/bin/utils.R"))
# Thresholds
chrY_cutoff <- -0.32
seurat <- readRDS(path_to_obj)
# seurat[["ATAC"]] <- NULL
Let us score each cell for cell cycling singatures:
seurat <- CellCycleScoring(
seurat,
s.features = cc.genes$s.genes,
g2m.features = cc.genes$g2m.genes
)
FeaturePlot(seurat, c("G2M.Score", "S.Score")) &
scale_color_viridis_c(option = "magma")
Since we subsetted the CD79A+ cells, let us rerun the general pipeline for dimensionality reduction:
seurat <- seurat %>%
FindVariableFeatures() %>%
ScaleData() %>%
RunPCA() %>%
RunUMAP(dims = 1:20, reduction = "pca")
DimPlot(seurat, cols = color_palette, reduction = "umap")
Exploring our data, we have observed that the two main “blobs” might be two subclones, one chrY+ and the other chrY. Loss of chromosome Y is a known feature of MCL:
FeaturePlot(
seurat,
features = c("UTY", "KDM5D", "DDX3Y", "USP9Y", "ZFY", "EIF1AY"),
reduction = "umap"
) &
scale_color_viridis_c(option = "magma")
Let us convert the expression of all genes located in chrY in a single score:
# annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
# seqlevelsStyle(annotations) <- "UCSC"
# annotations_chrY <- annotations[annotations@seqnames == "chrY", "gene_name"]
# goi_chrY <- unique(annotations_chrY$gene_name)
goi_chrY <- c("UTY", "KDM5D", "DDX3Y", "USP9Y", "ZFY", "EIF1AY")
seurat <- AddModuleScore(seurat, features = list(goi_chrY), name = "chrY_score")
FeaturePlot(seurat, "chrY_score1", reduction = "umap")
(density_gg <- seurat@meta.data %>%
ggplot(aes(chrY_score1)) +
geom_density() +
geom_vline(xintercept = chrY_cutoff, linetype = "dashed", color = "darkblue") +
theme_classic())
seurat$has_loss_chrY <- ifelse(
seurat$chrY_score1 > chrY_cutoff,
"chrY+",
"chrY-"
)
DimPlot(seurat, group.by = "has_loss_chrY", reduction = "umap")
seurat <- FindNeighbors(seurat, dims = 1:20, reduction = "pca")
seurat <- FindClusters(seurat, resolution = 0.15)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 21613
## Number of edges: 620624
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9161
## Number of communities: 5
## Elapsed time: 7 seconds
DimPlot(seurat, cols = color_palette, reduction = "umap")
FeaturePlot(seurat, c("CD8A", "CD3D"), reduction = "umap", order = TRUE)
Let us subcluster and eliminate residual T cells:
seurat <- FindSubCluster(
seurat,
cluster = "3",
graph.name = "RNA_snn",
subcluster.name = "T_cells",
resolution = 0.25
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 759
## Number of edges: 23680
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8321
## Number of communities: 4
## Elapsed time: 0 seconds
DimPlot(seurat, group.by = "T_cells", reduction = "umap")
Idents(seurat) <- "T_cells"
markers_t <- FindMarkers(seurat, ident.1 = "3_3", ident.2 = c("3_0", "3_1", "3_2"))
head(markers_t, 20)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## BCL11B 9.990807e-123 2.0916737 0.813 0.009 3.656735e-118
## CD247 8.137826e-88 1.5238327 0.626 0.011 2.978526e-83
## ITK 3.771045e-80 1.4004410 0.636 0.023 1.380240e-75
## IL32 2.448993e-78 2.0940067 0.673 0.037 8.963557e-74
## IKZF2 3.244255e-77 1.7480791 0.523 0.005 1.187430e-72
## INPP4B 3.729918e-77 1.1971904 0.542 0.008 1.365187e-72
## PAM 4.055698e-77 1.4991071 0.505 0.002 1.484426e-72
## TOX 3.932509e-74 2.3032859 0.701 0.051 1.439338e-69
## GPRIN3 2.275297e-72 1.3657375 0.617 0.029 8.327816e-68
## CD96 1.573863e-69 1.0592217 0.514 0.011 5.760495e-65
## CD3D 7.541885e-69 1.6501220 0.523 0.014 2.760405e-64
## THEMIS 3.514993e-68 1.7991109 0.439 0.000 1.286522e-63
## CD3G 3.957202e-68 1.1113803 0.449 0.002 1.448376e-63
## PRKCH 1.412496e-67 1.9869919 0.804 0.107 5.169877e-63
## ICOS 2.950589e-67 1.3054423 0.514 0.014 1.079945e-62
## GZMA 1.252004e-66 2.0757080 0.449 0.003 4.582459e-62
## CAMK4 1.443265e-63 1.4433147 0.477 0.011 5.282494e-59
## CLEC2B 1.468662e-63 1.0465479 0.477 0.011 5.375449e-59
## PRKCQ 2.408908e-63 0.9887146 0.421 0.002 8.816844e-59
## CD2 2.340802e-62 1.0820466 0.458 0.009 8.567570e-58
seurat <- subset(seurat, T_cells != "3_3")
DimPlot(seurat, reduction = "umap")
From our exploratory analysis we know that cluster 4 contains mostly poor-quality cells:
markers_4 <- FindMarkers(seurat, ident.1 = "4", only.pos = TRUE, logfc.threshold = 0.65)
head(markers_4, 20)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## MT-ND3 9.038398e-49 1.6067792 1.000 0.999 3.308144e-44
## MT-ATP6 8.779576e-48 1.4228569 1.000 0.999 3.213412e-43
## MT-ND2 3.636264e-46 1.5257671 1.000 0.998 1.330909e-41
## MT-CO2 5.205496e-46 1.5097712 1.000 0.999 1.905263e-41
## MT-CYB 1.907164e-45 1.4070283 1.000 0.999 6.980412e-41
## MT-ND1 2.151395e-45 1.4474213 1.000 0.998 7.874322e-41
## MT-CO3 9.533054e-45 1.5571124 1.000 0.999 3.489193e-40
## MT-ND4 7.545927e-43 1.3978481 1.000 0.998 2.761885e-38
## MT-CO1 1.959008e-41 1.2642564 1.000 0.999 7.170165e-37
## MT-ND4L 7.564680e-27 1.3755301 0.987 0.978 2.768748e-22
## MT-ND5 1.487207e-26 1.1285346 1.000 0.997 5.443327e-22
## MTRNR2L12 6.355171e-23 1.6450997 0.960 0.993 2.326056e-18
## MT-ATP8 8.659065e-20 1.1872892 0.973 0.987 3.169305e-15
## MTRNR2L8 9.326897e-11 1.4819400 0.720 0.750 3.413738e-06
## RPS27 4.351873e-04 0.8201465 0.760 0.918 1.000000e+00
## TTN 9.411678e-03 0.7117078 0.293 0.605 1.000000e+00
## AHNAK 2.042260e-01 0.7500239 0.347 0.601 1.000000e+00
## MT-ND6 3.458526e-01 0.7782676 0.413 0.553 1.000000e+00
## MTRNR2L1 3.525257e-01 0.6929666 0.107 0.086 1.000000e+00
## NFKBID 4.396542e-01 0.7449041 0.240 0.369 1.000000e+00
seurat <- subset(seurat, seurat_clusters != "4")
We observe remarkable heterogeneity. Thus, let us increase the clustering resolution:
seurat <- FindNeighbors(seurat, dims = 1:20, reduction = "pca")
seurat <- FindClusters(seurat, resolution = 0.7)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 21431
## Number of edges: 616785
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8213
## Number of communities: 12
## Elapsed time: 6 seconds
DimPlot(seurat, reduction = "umap")
#markers <- wilcoxauc(
# seurat,
# group_by = "seurat_clusters",
# seurat_assay = "RNA",
# y = levels(seurat$seurat_clusters)
#)
#markers <- markers[markers$padj < 0.001 & markers$logFC > 0.5, ]
#DT::datatable(markers)
# Dot plot
goi <- c("MT2A", "MT1E", "IGF2BP3", "RGS12", "RUBCNL",
"RGS7", "LRRFIP1", "BANK1", "ZNF804A", "PLCL2",
"RGS1", "TFEC", "CD69", "BTG2", "SIK3", "ECE1",
"CD58", "KIF26B", "IRF4", "HDAC9", "TRAF1", "MIR155HG",
"PCNA", "TOP2A", "STMN1", "CCDC88A", "NCL", "NFKB1",
"CCL3", "CCL4", "CD55", "GALNT2", "IRAK2", "AFF3",
"CXCR4")
DotPlot(seurat, features = rev(goi)) +
coord_flip()
Exploring our data, we have observed that the two main “blobs” might be two subclones, one chrY+ and the other chrY. Loss of chromosome Y is a known feature of MCL:
FeaturePlot(
seurat,
features = c("UTY", "KDM5D", "DDX3Y", "USP9Y", "ZFY", "EIF1AY"),
reduction = "umap"
) &
scale_color_viridis_c(option = "magma")
We will explore this later with infercnv but, for now, we will try to find “mirror” clusters in both subclones:
seurat <- FindClusters(seurat, resolution = 0.1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 21431
## Number of edges: 616785
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9345
## Number of communities: 3
## Elapsed time: 6 seconds
DimPlot(seurat, reduction = "umap")
# chrY-
Idents(seurat) <- "seurat_clusters"
seurat <- FindSubCluster(
seurat,
cluster = "1",
graph.name = "RNA_snn",
subcluster.name = "chrYneg",
resolution = 0.2
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5643
## Number of edges: 171371
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8699
## Number of communities: 4
## Elapsed time: 0 seconds
DimPlot(seurat, group.by = "chrYneg", reduction = "umap")
Idents(seurat) <- "chrYneg"
markers_1 <- FindAllMarkers(seurat, only.pos = TRUE, logfc.threshold = 0.8)
markers_1_1 <- FindMarkers(
seurat,
ident.1 = "1_1",
ident.2 = c("1_2", "1_0"),
only.pos = TRUE,
logfc.threshold = 0.75
)
markers_1_0 <- FindMarkers(
seurat,
ident.1 = "1_0",
ident.2 = c("1_2", "1_1"),
only.pos = TRUE,
logfc.threshold = 0.75
)
markers_1_2 <- FindMarkers(
seurat,
ident.1 = "1_2",
ident.2 = c("1_0", "1_1"),
only.pos = TRUE,
logfc.threshold = 0.75
)
DT::datatable(markers_1_0, options = list(scrollX = TRUE))
DT::datatable(markers_1_1, options = list(scrollX = TRUE))
DT::datatable(markers_1_2, options = list(scrollX = TRUE))
seurat_chrYneg <- subset(seurat, idents = c("1_0", "1_1", "1_2"))
DotPlot(
seurat_chrYneg,
features = rev(c("MT2A", "MT1X", "MT1G", "INPP5A", "GAB2",
"FOS", "JUN", "CD69", "MARCH1", "BANK1",
"MIR155HG", "TRAF1", "NFKB1", "NFAT5", "MYC", "IL2RA"))) +
coord_flip()
# chrY+
Idents(seurat) <- "seurat_clusters"
seurat <- FindSubCluster(
seurat,
cluster = "0",
graph.name = "RNA_snn",
subcluster.name = "chrYpos",
resolution = 0.15
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 15139
## Number of edges: 414716
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8852
## Number of communities: 4
## Elapsed time: 3 seconds
DimPlot(seurat, group.by = "chrYpos", reduction = "umap")
Idents(seurat) <- "chrYpos"
markers_0_0 <- FindMarkers(
seurat,
ident.1 = "0_0",
ident.2 = c("0_1", "0_2", "0_3"),
only.pos = TRUE,
logfc.threshold = 0.75
)
markers_0_1 <- FindMarkers(
seurat,
ident.1 = "0_1",
ident.2 = c("0_0", "0_2", "0_3"),
only.pos = TRUE,
logfc.threshold = 0.4
)
markers_0_2 <- FindMarkers(
seurat,
ident.1 = "0_2",
ident.2 = c("0_0", "0_1", "0_3"),
only.pos = TRUE,
logfc.threshold = 0.6
)
markers_0_3 <- FindMarkers(
seurat,
ident.1 = "0_3",
ident.2 = c("0_0", "0_1", "0_2"),
only.pos = TRUE,
logfc.threshold = 0.6
)
DT::datatable(markers_0_0, options = list(scrollX = TRUE))
DT::datatable(markers_0_1, options = list(scrollX = TRUE))
DT::datatable(markers_0_2, options = list(scrollX = TRUE))
DT::datatable(markers_0_3, options = list(scrollX = TRUE))
seurat_chrYpos <- subset(seurat, idents = c("0_0", "0_1", "0_2", "0_3"))
seurat_chrYpos$chrYpos <- factor(seurat_chrYpos$chrYpos, levels = c("0_0", "0_1", "0_2", "0_3"))
DotPlot(
seurat_chrYpos,
group.by = "chrYpos",
features = rev(c("MT1X", "MT2A", "MT1G",
"GALNTL6", "BANK1", "RGS7",
"JUN", "FOS", "CD69", "RGS1",
"MIR155HG", "NFKB1", "NFKBIZ", "TRAF1", "IL2RA", "CD44"))) +
coord_flip()
seurat$annotation_20220518 <- case_when(
seurat$chrYneg == "1_0" ~ "chrY- metallothionein",
seurat$chrYneg == "1_1" ~ "chrY- CD69+AP1+",
seurat$chrYneg == "1_2" ~ "chrY- MIR155HG+NFKB1+MYC+",
seurat$chrYneg == "1_3" ~ "non-tumoral B-cells",
seurat$chrYneg == "2" ~ "Cycling tumoral",
seurat$chrYpos == "0_0" ~ "chrY+ metallothionein",
seurat$chrYpos == "0_1" ~ "chrY+ RGS7+BANK1+",
seurat$chrYpos == "0_2" ~ "chrY+ CD69+AP1+",
seurat$chrYpos == "0_3" ~ "chrY+ MIR155HG+NFKB1+MYC+"
)
colors <- pals::polychrome()
names(colors) <- NULL
DimPlot(
seurat,
group.by = "annotation_20220518",
cols = colors,
reduction = "umap"
)
Prognostic significance of metallothionein in B-cell lymphomas # Save
#openxlsx::write.xlsx(list(markers = markers), file = path_to_save_xlsx)
saveRDS(seurat, path_to_save_obj)
Save input to shiny app:
# input_shiny <- seurat2shiny(
# seurat,
# assay = "RNA",
# slot = "data",
# reduction = "umap"
# )
# saveRDS(input_shiny$metadata, path_to_save_shiny_metadata)
# saveRDS(input_shiny$expression, path_to_save_shiny_expression)
sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=es_ES.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=es_ES.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=es_ES.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] DT_0.22 here_1.0.1 openxlsx_4.2.5 pals_1.7 forcats_0.5.1 stringr_1.4.0 dplyr_1.0.8 purrr_0.3.4 readr_2.1.2 tidyr_1.2.0 tibble_3.1.6 tidyverse_1.3.1 ggpubr_0.4.0 ggplot2_3.3.5 EnsDb.Hsapiens.v86_2.99.0 ensembldb_2.19.10 AnnotationFilter_1.19.0 GenomicFeatures_1.47.14 AnnotationDbi_1.57.1 Biobase_2.55.2 GenomicRanges_1.47.6 GenomeInfoDb_1.31.10 IRanges_2.29.1 S4Vectors_0.33.17 BiocGenerics_0.41.2 SLOcatoR_0.0.0.9000 Signac_1.6.0 SeuratObject_4.0.4 Seurat_4.1.0 BiocStyle_2.23.1
##
## loaded via a namespace (and not attached):
## [1] rappdirs_0.3.3 SnowballC_0.7.0 rtracklayer_1.55.4 scattermore_0.8 ModelMetrics_1.2.2.2 bit64_4.0.5 knitr_1.38 irlba_2.3.5 DelayedArray_0.21.2 data.table_1.14.2 rpart_4.1.16 KEGGREST_1.35.0 hardhat_0.2.0 RCurl_1.98-1.6 generics_0.1.2 cowplot_1.1.1 RSQLite_2.2.12 RANN_2.6.1 future_1.25.0 tzdb_0.3.0 bit_4.0.4 spatstat.data_2.2-0 xml2_1.3.3 lubridate_1.8.0 httpuv_1.6.5 SummarizedExperiment_1.25.3 assertthat_0.2.1 gower_1.0.0 xfun_0.30 hms_1.1.1 jquerylib_0.1.4 evaluate_0.15 promises_1.2.0.1 fansi_1.0.3 restfulr_0.0.13 progress_1.2.2 readxl_1.4.0 dbplyr_2.1.1 igraph_1.3.1 DBI_1.1.2 htmlwidgets_1.5.4 sparsesvd_0.2
## [43] spatstat.geom_2.4-0 ellipsis_0.3.2 crosstalk_1.2.0 RSpectra_0.16-1 backports_1.4.1 bookdown_0.26 biomaRt_2.51.4 deldir_1.0-6 MatrixGenerics_1.7.0 vctrs_0.4.1 ROCR_1.0-11 abind_1.4-5 caret_6.0-92 cachem_1.0.6 withr_2.5.0 ggforce_0.3.3 sctransform_0.3.3 GenomicAlignments_1.31.2 prettyunits_1.1.1 goftest_1.2-3 cluster_2.1.3 lazyeval_0.2.2 crayon_1.5.1 labeling_0.4.2 recipes_0.2.0 pkgconfig_2.0.3 slam_0.1-50 tweenr_1.0.2 nlme_3.1-157 ProtGenerics_1.27.2 nnet_7.3-17 rlang_1.0.2 globals_0.14.0 lifecycle_1.0.1 miniUI_0.1.1.1 filelock_1.0.2 BiocFileCache_2.3.5 modelr_0.1.8 dichromat_2.0-0 rprojroot_2.0.3 cellranger_1.1.0 polyclip_1.10-0
## [85] matrixStats_0.62.0 lmtest_0.9-40 Matrix_1.4-1 ggseqlogo_0.1 carData_3.0-5 zoo_1.8-10 reprex_2.0.1 ggridges_0.5.3 png_0.1-7 viridisLite_0.4.0 rjson_0.2.21 bitops_1.0-7 KernSmooth_2.23-20 pROC_1.18.0 Biostrings_2.63.3 blob_1.2.3 parallelly_1.31.1 spatstat.random_2.2-0 rstatix_0.7.0 ggsignif_0.6.3 scales_1.2.0 memoise_2.0.1 magrittr_2.0.3 plyr_1.8.7 ica_1.0-2 zlibbioc_1.41.0 compiler_4.2.0 BiocIO_1.5.0 RColorBrewer_1.1-3 fitdistrplus_1.1-8 Rsamtools_2.11.0 cli_3.3.0 XVector_0.35.0 listenv_0.8.0 patchwork_1.1.1 pbapply_1.5-0 MASS_7.3-56 mgcv_1.8-40 tidyselect_1.1.2 stringi_1.7.6 highr_0.9 yaml_2.3.5
## [127] ggrepel_0.9.1 grid_4.2.0 sass_0.4.1 fastmatch_1.1-3 tools_4.2.0 future.apply_1.9.0 parallel_4.2.0 rstudioapi_0.13 foreach_1.5.2 lsa_0.73.2 gridExtra_2.3 prodlim_2019.11.13 farver_2.1.0 Rtsne_0.16 digest_0.6.29 BiocManager_1.30.17 shiny_1.7.1 lava_1.6.10 qlcMatrix_0.9.7 Rcpp_1.0.8.3 car_3.0-12 broom_0.8.0 later_1.3.0 harmony_0.1.0 RcppAnnoy_0.0.19 httr_1.4.2 colorspace_2.0-3 rvest_1.0.2 fs_1.5.2 XML_3.99-0.9 tensor_1.5 reticulate_1.24 splines_4.2.0 uwot_0.1.11 RcppRoll_0.3.0 spatstat.utils_2.3-0 mapproj_1.2.8 plotly_4.10.0 xtable_1.8-4 jsonlite_1.8.0 timeDate_3043.102 ipred_0.9-12
## [169] R6_2.5.1 pillar_1.7.0 htmltools_0.5.2 mime_0.12 glue_1.6.2 fastmap_1.1.0 BiocParallel_1.29.21 class_7.3-20 codetools_0.2-18 maps_3.4.0 utf8_1.2.2 lattice_0.20-45 bslib_0.3.1 spatstat.sparse_2.1-1 curl_4.3.2 leiden_0.3.9 zip_2.2.0 limma_3.51.8 survival_3.3-1 rmarkdown_2.14 docopt_0.7.1 munsell_0.5.0 GenomeInfoDbData_1.2.8 iterators_1.0.14 haven_2.5.0 reshape2_1.4.4 gtable_0.3.0 spatstat.core_2.4-2